#####################################################################################
# Replication files for main text analyses in Communicating the Politics of the Law # 
#####################################################################################
# Loading required packages
library(lfe); library(stargazer); library(tidyverse); library(xtable)

# Loading the data
# Release level
load("release_level_data.RData")
# Senator level
load("senator_level_data.RData")
# Paragraph level
load("paragraph_level_data.RData")
# Abortion paragraph level
load("abortion_paragraph_level_data.RData")
# Abortion document level
load("abortion_release_level_data.RData")

###################################################
# In-text discussion of rate of senator messaging #
###################################################
# Unique senators in this time period
senator_data_year %>% pull(last.name) %>% unique() %>% length()

# Senators who issued press releases in our data
releases %>% pull(last.name) %>% unique() %>% length()

############################################################
# Figure 1: Senatorial Messaging About Supreme Court Cases #
############################################################
# Stacked barplot of press releases by year
releases %>%
  # Codes independents in our data (King and Sanders) as Ds
  mutate(Party = ifelse(Party == "Independent", "Democrat", Party)) %>%
  # Plotting
  ggplot(aes(x = year, color = Party, fill = Party)) +
  geom_bar() +
  theme_minimal() +
  scale_color_manual(values = c("blue", "red")) +
  scale_fill_manual(values = c("blue", "red")) +
  ylab("Number of Press Releases") +
  xlab("Year of Issuance") +
  scale_x_continuous(breaks = 2012:2024) +
  scale_y_continuous(limits = c(0,250)) +
  theme(panel.grid.minor.x = element_blank(),
        panel.grid.major.x = element_blank())

ggsave("barplot_party_year.pdf", width = 7, height = 5)

###########################################################
# Table 1: Most Discussed Cases in Senator Press Releases #
###########################################################
# Data for table on top cases discussed in press releases
releases %>%
  # Splitting instances where we have multiple case names for a given press release so each case has its own variable (the most there are is 20)
  mutate(case_name_1 = str_split_i(case_name, ', ', i = 1),
         case_name_2 = str_split_i(case_name, ', ', i = 2),
         case_name_3 = str_split_i(case_name, ', ', i = 3),
         case_name_4 = str_split_i(case_name, ', ', i = 4),
         case_name_5 = str_split_i(case_name, ', ', i = 5),
         case_name_6 = str_split_i(case_name, ', ', i = 6),
         case_name_7 = str_split_i(case_name, ', ', i = 7),
         case_name_8 = str_split_i(case_name, ', ', i = 8),
         case_name_9 = str_split_i(case_name, ', ', i = 9),
         case_name_10 = str_split_i(case_name, ', ', i = 10),
         case_name_11 = str_split_i(case_name, ', ', i = 11),
         case_name_12 = str_split_i(case_name, ', ', i = 12),
         case_name_13 = str_split_i(case_name, ', ', i = 13),
         case_name_14 = str_split_i(case_name, ', ', i = 14),
         case_name_15 = str_split_i(case_name, ', ', i = 15),
         case_name_16 = str_split_i(case_name, ', ', i = 16),
         case_name_17 = str_split_i(case_name, ', ', i = 17),
         case_name_18 = str_split_i(case_name, ', ', i = 18),
         case_name_19 = str_split_i(case_name, ', ', i = 19),
         case_name_20 = str_split_i(case_name, ', ', i = 20)) %>%
  # Drop original combined case_name variable
  select(-case_name) %>% 
  # Pivoting to long so there is a row for each case name in each press release
  pivot_longer(cols = starts_with("case_name_"), names_to = "case_number", values_to = "case_name", values_drop_na = TRUE) %>%
  # Grouping by case name
  group_by(case_name) %>%
  # Counting how many, proportion Rs
  summarize(n = n(),
            Republican = mean(Party == "Republican")) %>%
  # Cleaning up a few names and making proper formatting
  mutate(case_name = gsub("Commerce_v_New_York", "Department_of_Commerce_v_New_York", case_name),
         case_name = gsub("Services_v_Russo", "June_Medical_Services_v_Russo", case_name),
         case_name = gsub("_v_", " v\\. ", case_name),
         case_name = gsub("_", " ", case_name),
         case_name = paste("\\textit{", case_name, "}", sep = "")) %>%
  # Keeping cases mentioned more than 10 times
  filter(n >= 10) %>%
  # Arrange from most to least mentions
  arrange(desc(n)) %>%
  # Clean names
  rename(`Prop. Republican` = Republican,
         `Case Name` = case_name,
         `Number of Releases` = n) %>%
  # Table caption and label
  xtable(caption = "Most Discussed Cases in Senator Press Releases, 2012--2024",
         label = "top_cases") %>%
  print(caption.placement = "top",
        sanitize.text.function = identity)

########################################
# In-text discussion of top cases data #
########################################
### How many press releases discuss one of these top cases? ###
# Top cases (discussed 10 or more times) in character form
ten_or_more <- releases %>%
  # Splitting instances where we have multiple case names for a given press release so each case has its own variable (the most there are is 20)
  mutate(case_name_1 = str_split_i(case_name, ', ', i = 1),
         case_name_2 = str_split_i(case_name, ', ', i = 2),
         case_name_3 = str_split_i(case_name, ', ', i = 3),
         case_name_4 = str_split_i(case_name, ', ', i = 4),
         case_name_5 = str_split_i(case_name, ', ', i = 5),
         case_name_6 = str_split_i(case_name, ', ', i = 6),
         case_name_7 = str_split_i(case_name, ', ', i = 7),
         case_name_8 = str_split_i(case_name, ', ', i = 8),
         case_name_9 = str_split_i(case_name, ', ', i = 9),
         case_name_10 = str_split_i(case_name, ', ', i = 10),
         case_name_11 = str_split_i(case_name, ', ', i = 11),
         case_name_12 = str_split_i(case_name, ', ', i = 12),
         case_name_13 = str_split_i(case_name, ', ', i = 13),
         case_name_14 = str_split_i(case_name, ', ', i = 14),
         case_name_15 = str_split_i(case_name, ', ', i = 15),
         case_name_16 = str_split_i(case_name, ', ', i = 16),
         case_name_17 = str_split_i(case_name, ', ', i = 17),
         case_name_18 = str_split_i(case_name, ', ', i = 18),
         case_name_19 = str_split_i(case_name, ', ', i = 19),
         case_name_20 = str_split_i(case_name, ', ', i = 20)) %>%
  select(-case_name) %>% 
  pivot_longer(cols = starts_with("case_name_"), names_to = "case_number", values_to = "case_name", values_drop_na = TRUE) %>%
  # Counting instances of each case
  count(case_name) %>%
  filter(n >= 10) %>%
  # Extracting the case names
  pull(case_name) %>%
  # Pasting them together
  paste(collapse = "|")

# Determining number of releases that discuss one of these cases
releases %>%
  filter(str_detect(case_name, ten_or_more)) %>%
  nrow()

# Percentage
round((releases %>%
  filter(str_detect(case_name, ten_or_more)) %>%
  nrow())/length(releases$case_name),3)

### T-test of difference in Republican messaging rates for liberal and conservative cases ###
# Top cases (discussed 10 or more times), adding in data on whether case was liberal or conservative
messaging_outcome <- releases %>%
  mutate(case_name_1 = str_split_i(case_name, ', ', i = 1),
         case_name_2 = str_split_i(case_name, ', ', i = 2),
         case_name_3 = str_split_i(case_name, ', ', i = 3),
         case_name_4 = str_split_i(case_name, ', ', i = 4),
         case_name_5 = str_split_i(case_name, ', ', i = 5),
         case_name_6 = str_split_i(case_name, ', ', i = 6),
         case_name_7 = str_split_i(case_name, ', ', i = 7),
         case_name_8 = str_split_i(case_name, ', ', i = 8),
         case_name_9 = str_split_i(case_name, ', ', i = 9),
         case_name_10 = str_split_i(case_name, ', ', i = 10),
         case_name_11 = str_split_i(case_name, ', ', i = 11),
         case_name_12 = str_split_i(case_name, ', ', i = 12),
         case_name_13 = str_split_i(case_name, ', ', i = 13),
         case_name_14 = str_split_i(case_name, ', ', i = 14),
         case_name_15 = str_split_i(case_name, ', ', i = 15),
         case_name_16 = str_split_i(case_name, ', ', i = 16),
         case_name_17 = str_split_i(case_name, ', ', i = 17),
         case_name_18 = str_split_i(case_name, ', ', i = 18),
         case_name_19 = str_split_i(case_name, ', ', i = 19),
         case_name_20 = str_split_i(case_name, ', ', i = 20)) %>%
  select(-case_name) %>% 
  pivot_longer(cols = starts_with("case_name_"), names_to = "case_number", values_to = "case_name", values_drop_na = TRUE) %>%
  filter(case_name != "") %>%
  group_by(case_name) %>%
  # Counting how many, proportion Rs
  summarize(n = n(),
            Republican = mean(Party == "Republican")) %>%
  filter(n >= 10) %>%
  # Load in a csv that has whether these top cases were liberal or conservative
  left_join(read_csv("top_cases_final.csv")) %>% 
  # Keep columns on proportion Rs and whether these cases were liberal or conservative
  select(Republican, conservative_decision)

# T-test (compare the conservative proportions and liberal proportions)
test.of.difference <- t.test(messaging_outcome %>% filter(conservative_decision == 1) %>% pull(Republican),
                             messaging_outcome %>% filter(conservative_decision == 0) %>% pull(Republican))
# P-value
round(test.of.difference$p.value,3)

#############################################################################
# In-text discussion of within-senator variation in senator-level variables #
#############################################################################
# Marginality
senator_data_year %>% 
  # For each senator, gives a count of how many times they are marginal = 0 and marginal = 1 (if one of these is never true, it doesn't get counted)
  count(last.name, marginal) %>%
  select(-n) %>%
  # Seeing how often a senator's name appears
  count(last.name) %>%
  # Determining which senators' names appear twice and filtering to them
  filter(n > 1) %>%
  # How many senators that is
  nrow()

# Judiciary Committee membership
senator_data_year %>% 
  count(last.name, on_sjc) %>%
  select(-n) %>%
  count(last.name) %>%
  filter(n > 1) %>%
  nrow()

#########################################
# Table 2: Predicting Senator Messaging #
#########################################
# Running a model with number of messages as outcome, year and state fixed effects, clustering by state
case1 <- felm(case_messages ~ republican + republican:post_2018 + majority_party + marginal * up_for_election + extremity + 
                on_sjc + senator_lawyer + seniority + seniority.squared + freshman + case_count|year+
                two.letter.state.abbreviation|0|two.letter.state.abbreviation, data = senator_data_year)
# Running a model with number of messages as outcome, year and senator fixed effects, clustering by senator
case1_b <- felm(case_messages ~ majority_party + marginal * up_for_election + on_sjc + freshman + case_count|year+
                  last.name|0|last.name, data = senator_data_year)

# Running a model with log message length as outcome, year and state fixed effects, clustering by state
length1 <- felm(log_message_length ~ republican + republican:post_2018 + majority_party + marginal * up_for_election + extremity + 
                  on_sjc + senator_lawyer + seniority + seniority.squared + freshman + case_count|year+
                  two.letter.state.abbreviation|0|two.letter.state.abbreviation, data = senator_data_year)
# Running a model with log message length as outcome, year and senator fixed effects, clustering by senator
length1_b <- felm(log_message_length ~ majority_party + marginal * up_for_election + on_sjc + freshman + case_count|year+
                    last.name|0|last.name, data = senator_data_year)

# Stargazer table of results
stargazer(case1, case1_b, length1, length1_b,
          digits = 2,
          covariate.labels = c("Republican",
                               "Majority Party",
                               "Marginal Senator",
                               "Up for Election",
                               "Extremity",
                               "On Judiciary Committee",
                               "Lawyer",
                               "Seniority",
                               "Seniority squared",
                               "Freshman",
                               "Number of Cases from State",
                               "Republican x Post-2018",
                               "Marginal x Up for Election"),
          dep.var.labels = c("\\# Messages", "ln(Total Message Length)"),
          style = "apsr",
          keep.stat = c("n", "adj.rsq"),
          add.lines = list(c("Year FEs", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark"),
                           c("State FEs", "\\checkmark", "", "\\checkmark", ""),
                           c("Senator FEs", "", "\\checkmark", "", "\\checkmark"),
                           c("SE Cluster", "State", "Senator", "State", "Senator")),
          title = "Predicting Senator Messaging about Supreme Court Cases, 2012--2024",
          label = "sen_level_felm")

#########################################################################
# In-text discussion of number of senators with messages about abortion #
#########################################################################
# How many senators have paragraphs about abortion cases?
num_abortion <- senate_para %>% filter(abortion == 1) %>% count(last.name) %>% nrow()
# How many total senators have paragraphs?
denom_abortion <- senate_para %>% count(last.name) %>% nrow()
# Percentage of senators with paragraphs about abortion cases
round((num_abortion / denom_abortion),3)

###########################################
# Table 3: Legal and Legislative Rhetoric #
###########################################
# Paragraph-level regression of use of legal language as function of theoretical predictors; year and state fixed effects, clustering by state
para_level <- felm(legal_language ~ republican + republican:post_2018 + majority_party + marginal * up_for_election + extremity + 
                     on_sjc + senator_lawyer + seniority + seniority.squared + freshman + log_message_length|year+
                     two.letter.state.abbreviation|0|two.letter.state.abbreviation, data = senate_para_abortion)

# Paragraph-level regression of use of legislative language as function of theoretical predictors; year and state fixed effects, clustering by state
para_level2 <- felm(act ~ republican + republican:post_2018 + majority_party + marginal * up_for_election + extremity + 
                      on_sjc + senator_lawyer + seniority + seniority.squared + freshman + log_message_length|year+
                      two.letter.state.abbreviation|0|two.letter.state.abbreviation, data = senate_para_abortion)

# Stargazer table of results
stargazer(para_level, para_level2,
          digits = 2,
          covariate.labels = c("Republican",
                               "Majority Party",
                               "Marginal Senator",
                               "Up for Election",
                               "Extremity",
                               "On Judiciary Committee",
                               "Lawyer",
                               "Seniority",
                               "Seniority squared",
                               "Freshman",
                               "ln(Paragraph Length)",
                               "Republican x Post-2018",
                               "Marginal x Up for Election"),
          dep.var.labels = c("Legal Rhetoric", "Legislative Rhetoric"),
          style = "apsr",
          keep.stat = c("n", "adj.rsq"),
          add.lines = list(c("Year FEs", "\\checkmark", "\\checkmark"),
                           c("State FEs", "\\checkmark", "\\checkmark"),
                           c("State-clustered SEs", "\\checkmark", "\\checkmark"),
                           c("Unit of Analysis", "Paragraph", "Paragraph")),
          title = "Legal and Legislative Rhetoric in Senator Messaging about Abortion Cases",
          label = "abortion_felm_para",
          font.size = "footnotesize")

##########################################
# In-text discussion of extremity effect #
##########################################
# P-value of extremity effect in legal rhetoric model
summary(para_level)$coefficients[5,4]

#########################################################################################
# In-text discussion of number of paragraphs and statements with/without legal rhetoric #
#########################################################################################
# Proportion of paragraphs not using legal language
round((1 - senate_para_abortion %>% pull(legal_language) %>% mean()),3)
# Proportion of documents not using legal language
round((1 - senate_document_abortion %>% pull(legal_language) %>% mean()),3)

# Proportion of documents with paragraphs that both do and do not use legal language
senate_document_abortion %>% filter(legal_language_prop > 0, legal_language_prop < 1) %>% nrow()
# Total documents about abortion
length(senate_document_abortion$doc_id)

